cls
clear all
local dir "~/Dropbox/Research/Distinction Effect"
cd "`dir'"

use "Data/Intermediates/OLE_2007_2016.dta", clear

merge 1:1 national* using "Data/Intermediates/Pila_2009_2016.dta"
keep if _merge == 3 | _merge == 1
drop _merge

//Industry Codes
foreach year of numlist 2009(1)2016 {
	gen ciiu = ole_ciiu_`year'
	cap replace ciiu = pila_ciiu_`year' if ciiu == .
	
	merge m:1 ciiu using "Data/Intermediates/IndystryCodes_CIIU3_31.dta", keepusing(ciiu)
	keep if _merge == 3 | _merge == 1
	replace ciiu = . if _merge != 3
	drop _merge
	
	rename ciiu ciiu_`year'
}
	
egen aux = rownonmiss(ciiu_????)
drop if aux == 0
drop aux

//Establishment ID
foreach year of numlist 2009(1)2016 {
	rename pila_firm_id_`year' aux
	egen pila_firm_id_`year' = sieve(aux), keep(n)
	drop aux
	
	gen firm_id_`year' = pila_firm_id_`year' if pila_firm_type_`year' == "NI" | pila_firm_type_`year' == "NIT"
}

//Real Earnings 
local ipc_2007_2007 = 1
local ipc_2007_2008 = 1.076828139
local ipc_2007_2009 = 1.098426412
local ipc_2007_2010 = 1.13313792
local ipc_2007_2011 = 1.175408824
local ipc_2007_2012 = 1.204103672
local ipc_2007_2013 = 1.227398951
local ipc_2007_2014 = 1.272292502
local ipc_2007_2015 = 1.358377044
local ipc_2007_2016 = 1.436439371
foreach year of numlist 2007(1)2016{
	gen oleaux`year' = ole_earnings_`year' if ole_earnings_`year' > 0 & ole_earnings_`year' != .
	gen double earnings_`year' = .
	replace earnings_`year' = oleaux`year'/`ipc_2007_`year''
	drop oleaux`year'
		
	if `year' > 2008 {
	gen pilaaux`year' = pila_earnings_`year' if pila_earnings_`year' > 0 & pila_earnings_`year' != .
	replace earnings_`year' = pilaaux`year'/`ipc_2007_`year'' if earnings_`year' == .
	drop pilaaux`year'
	}
}

//Graduation Year Postgraduate Degree
tostring fst_g_gradsch, gen(year_gradsch)
replace year_gradsch = substr(year_gradsch, 1, 4)
destring year_gradsch, replace

//Graduation Year Undergraduate Degree
tostring fst_grad, gen(year_grad)
replace year_grad = substr(year_grad, 1, 4)
destring year_grad, replace

//Sample Restriction 
drop if year_grad == .
drop if year_gradsch <= year_grad
*drop if ole_birthdate == .

keep national_id* year_grad year_gradsch ole_birthdate earnings_???? firm_id_???? pila_firm_type_???? ciiu_????
drop earnings_2007 earnings_2008

reshape long earnings_ ciiu_ firm_id_ pila_firm_type_, i(national_id*) j(year)
rename earnings_ earnings
rename firm_id_ firm_id
rename pila_firm_type_ firm_type
rename ciiu_ ciiu

drop if firm_id == ""
keep if year >= year_grad
drop if national_id == firm_id

/*
keep if firm_type == "NI"
drop if length(firm_id) > 10
drop if length(firm_id) < 8
*/

//Keep if there are at least 4 different workers from 2009 to 2015
local num_workers = 9 //4
preserve
	duplicates drop national* firm_id, force
	gen n = 1
	collapse (sum) n, by(firm_id)
	keep if n >= `num_workers'
	drop n
	tempfile Establishments
	save `Establishments'
restore
	
merge m:1 firm_id using `Establishments'
keep if _merge == 3
drop _merge

drop if ciiu == .
drop if earnings == .

//Mode of Industry by Establishment
//When the mode does not exist impute the industry code of 
//the worker with the highest payment
rename ciiu aux
tempvar max_earn aux
bys firm_id : egen ciiu = mode(aux)
bys firm_id : egen double `max_earn' = max(earnings)
replace ciiu = aux if ciiu == . & earnings == `max_earn'
bys firm_id : egen `aux' = max(ciiu)
replace ciiu = `aux'
drop aux

collapse (mean) earnings, by(year firm_id ciiu)

//3-digit Industry Codes
tostring ciiu, replace
replace ciiu = "0" + ciiu if length(ciiu) == 3
gen ciiu3 = substr(ciiu, 1, 2)

//Create Rank
egen double rank_earnings = rank(earnings), by(year ciiu3)
//Normalize between 0 and 1
egen double max_rank = max(rank_earnings), by(year ciiu3)
egen double min_rank = min(rank_earnings), by(year ciiu3)
replace rank_earnings= (rank_earnings-min_rank)/max_rank
drop max_rank min_rank

rename earnings firm_earnings
reshape wide firm_earnings rank_earnings, i(firm_id) j(year)

//order
order firm_id* ciiu* firm_earnings* rank_earnings*

egen double aux = rowmean(firm_earnings*)
bys ciiu3 : egen double rank_earnings = rank(aux), track
by ciiu3 : egen double max = max(rank_earnings)
by ciiu3 : egen double min = min(rank_earnings)
replace rank_earnings = (rank_earnings - min)/max
drop max min aux

sort ciiu3 rank_earnings

save "Data/Finals/FirmIndustryRanking.dta", replace
